GOAL: annotate cell types and define expanded clonotypes (B cells and T cells)
# Set Working Directory before starting
# Immcantation
suppressPackageStartupMessages(library(alakazam))
suppressPackageStartupMessages(library(shazam))
suppressPackageStartupMessages(library(tigger))
suppressPackageStartupMessages(library(dowser))
suppressPackageStartupMessages(library(airr))
suppressPackageStartupMessages(library(glmGamPoi))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(dittoSeq))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(kableExtra))
suppressPackageStartupMessages(library(devtools))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(impactSingleCellToolkit))
# Immcantation Results
bcr_heavy_fh <- "data/results_immcantation/BCR_CSFPB_heavy_germ-pass.tsv"
bcr_light_fh <- "data/results_immcantation/BCR_CSFPB_light_germ-pass.tsv"
tcr_beta_fh <- "data/results_immcantation/TCR_CSFPB_heavy_germ-pass.tsv"
tcr_alpha_fh <- "data/results_immcantation/TCR_CSFPB_light_germ-pass.tsv"
# seed number
seed.number <- 12345
# Workshop specific functions
source("resources/functions_workshop.R")
At this stage of the analysis scRNA-Seq, BCR and TCR repertoire datasets have been processed and overlapped * Doublet were removed computationally using ‘DoubletFinder’ * cells with more than 10% mitochondrial gene expression were omitted * Only B cells and T cells exist in this object
rnaseq_obj <- readRDS("objects/seurat.obj.processed.rds")
# TCR
db.tra <- read_rearrangement(tcr_alpha_fh) %>% as.data.frame()
db.trb <- read_rearrangement(tcr_beta_fh) %>% as.data.frame()
# BCR
db.heavy <- read_rearrangement(bcr_heavy_fh) %>% as.data.frame()
db.light <- read_rearrangement(bcr_light_fh) %>% as.data.frame()
bcr_df <- mergeVDJresults(db.heavy,db.light,umi.thresh = 3,assay = "bcr")
tcr_df <- mergeVDJresults(db.trb,db.tra,umi.thresh = 2, assay = "tcr")
# format VDJ unique cell IDs to match scRNA-Seq (function only for this workshop)
formatUniqueIDcolum <- function(uni_id_column,sep = ":") {
samples <- tstrsplit(uni_id_column, sep)[[1]]
cells <- tstrsplit(uni_id_column, sep)[[2]]
fmt_ids <- paste(cells,samples,sep = "-")
return(fmt_ids)
}
bcr_df$unique_cell_id <- formatUniqueIDcolum(bcr_df$unique_cell_id)
tcr_df$unique_cell_id <- formatUniqueIDcolum(tcr_df$unique_cell_id)
# Add sample column
tcr_df$sample <- tstrsplit(tcr_df$unique_cell_id,"-")[[2]]
bcr_df$sample <- tstrsplit(bcr_df$unique_cell_id,"-")[[2]]
# Add column for CSF/PB
tcr_df$COMPARTMENT <- tstrsplit(tcr_df$sample,"_")[[2]]
bcr_df$COMPARTMENT <- tstrsplit(bcr_df$sample,"_")[[2]]
tcr_df$COMPARTMENT[tcr_df$COMPARTMENT %in% "PBMC"] <- "PB"
bcr_df$COMPARTMENT[bcr_df$COMPARTMENT %in% "PBMC"] <- "PB"
# Add Status Disease/Healthy
tcr_df$STATUS <- tstrsplit(tcr_df$sample,"_")[[1]] %>% as.character()
tcr_df$STATUS[tcr_df$STATUS %in% "32"] <- "Healthy"
tcr_df$STATUS[tcr_df$STATUS %in% "28"] <- "Disease"
bcr_df$STATUS <- tstrsplit(bcr_df$sample,"_")[[1]] %>% as.character()
bcr_df$STATUS[bcr_df$STATUS %in% "32"] <- "Healthy"
bcr_df$STATUS[bcr_df$STATUS %in% "28"] <- "Disease"
set.seed(seed.number)
rnaseq_obj <- scaleAndClusterSeuratObject(rnaseq_obj,dims = 1:5,npca = 5,resolution = 0.3,
normalize = F,dim.reduction = T)
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5316
## Number of edges: 154122
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9137
## Number of communities: 9
## Elapsed time: 0 seconds
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
p3_reclustered <- DimPlot(object = rnaseq_obj, group.by="main_bencode", pt.size=1.0, label.size = 10,label = T,reduction = "umap")
p4_reclustered <- DimPlot(object = rnaseq_obj, group.by="fine_bencode", pt.size=1.0,label.size = 10, label = T,reduction = "umap")
plot_grid(p3_reclustered,p4_reclustered,ncol = 2)
# Add compartment column
rnaseq_obj$compartment <- tstrsplit(rnaseq_obj$sample,"_")[[2]]
rnaseq_obj$compartment <- str_replace_all(rnaseq_obj$compartment,"PBMC","PB")
# Add patient status column
rnaseq_obj$status <- tstrsplit(rnaseq_obj$sample,"_")[[1]]
rnaseq_obj$status[rnaseq_obj$status %in% "32"] <- "Healthy"
rnaseq_obj$status[rnaseq_obj$status %in% "28"] <- "Disease"
# loc
loc_cols = c("CSF" = "blue", "PB" = "red")
diagnosis_cols = c("Disease" = "purple","Healthy" = "orange")
p1 <- DimPlot(object = rnaseq_obj, group.by="seurat_clusters", pt.size=1.0, label.size = 10, alpha = 0.7,label = T,reduction = "umap")
p2 <- DimPlot(object = rnaseq_obj, group.by="compartment", pt.size=1.0, label.size = 10, alpha = 0.7,label = F,cols = loc_cols,reduction = "umap")
p3 <- DimPlot(object = rnaseq_obj, group.by="status", pt.size=1.0, label.size = 10,alpha = 0.7,label = F,cols = diagnosis_cols,reduction = "umap")
plot_grid(p1,p2,p3,ncol = 3)
main_annot <- names(rnaseq_obj$orig.ident)
main_annot[main_annot %in% bcr_df$unique_cell_id] <- "B cells"
main_annot[main_annot %in% tcr_df$unique_cell_id] <- "T cells"
names(main_annot) <- names(rnaseq_obj$orig.ident)
rnaseq_obj$main.celltype.annot <- main_annot
p4 <- DimPlot(object = rnaseq_obj, group.by="seurat_clusters", pt.size=1.0, label.size = 10, alpha = 0.7,label = T,reduction = "umap")
p5 <- DimPlot(object = rnaseq_obj, group.by="main.celltype.annot", pt.size=1.0, label.size = 10, alpha = 0.7,label = T,reduction = "umap")
plot_grid(p4,p5, ncol = 2)
subtype_annotations <- colnames(rnaseq_obj)
Idents(rnaseq_obj) <- rnaseq_obj$seurat_clusters
p6 <- VlnPlot(rnaseq_obj,features = c("CD4","CD8A","CD8B"),pt.size = 0,ncol = 1)
plot_grid(p6,p4,ncol = 2,rel_heights = c(8,6),rel_widths = c(6,6))
# SingleR Encode annotatons
singler_annot_df <- rnaseq_obj@meta.data[,c("sample","seurat_clusters","main_bencode","fine_bencode","main.celltype.annot")]
# Manual T cell annotation based on CD8 expression
CD8_pos_tcells <- colnames(subset(rnaseq_obj, subset = CD8A > 0 | CD8B > 0))
singler_annot_df$cd8_pos <- row.names(singler_annot_df)
singler_annot_df$cd8_pos[singler_annot_df$cd8_pos %in% CD8_pos_tcells] <- "yes"
singler_annot_df$cd8_pos[! singler_annot_df$cd8_pos %in% "yes"] <- "no"
tcell_subtype_annotation <- names(rnaseq_obj$main.celltype.annot[rnaseq_obj$main.celltype.annot %in% "T cells"])
CD8_pos_tcells <- tcell_subtype_annotation[tcell_subtype_annotation %in% CD8_pos_tcells]
CD4_tcells <- tcell_subtype_annotation[! tcell_subtype_annotation %in% CD8_pos_tcells]
singler_annot_df$sub.celltype.annot <- singler_annot_df$main.celltype.annot
singler_annot_df[row.names(singler_annot_df) %in% CD8_pos_tcells, "sub.celltype.annot"] <- "CD8 T cells"
singler_annot_df[row.names(singler_annot_df) %in% CD4_tcells, "sub.celltype.annot"] <- "CD4 T cells"
# Stacked bar
# - Not all SingleR predictions for CD8+ T cells express CD8
summary_main_bencode <- singler_annot_df %>% count(seurat_clusters, main_bencode, name = "count")
levels(summary_main_bencode$main_bencode) <- sort(unique(summary_main_bencode$main_bencode))
colors_main_bencode <- c("red","darkgreen","darkblue","purple","orange","grey")
p7 <- ggplot(summary_main_bencode, aes(x = seurat_clusters, y = count, fill = main_bencode)) + geom_bar(stat = "identity",position = "fill") + scale_fill_manual(values = colors_main_bencode) + ggtitle("SingleR Cell-type Predictions")
summary_subtype <- singler_annot_df %>% count(seurat_clusters, sub.celltype.annot, name = "count")
levels(summary_subtype$sub.celltype.annot) <- sort(unique(summary_subtype$sub.celltype.annot))
colors_subtype <- c("red","darkgreen","darkblue")
p8 <- ggplot(summary_subtype, aes(x = seurat_clusters, y = count, fill = sub.celltype.annot)) + geom_bar(stat = "identity",position = "fill") + scale_fill_manual(values = colors_subtype) + ggtitle("Multiomics")
plot_grid(p7,p8,ncol = 1,align = "v")
# TCR
tcell_annotation_col <- rep("No Annot",nrow(tcr_df))
names(tcell_annotation_col) <- tcr_df$unique_cell_id
subtype_annotations_tcr <- singler_annot_df[singler_annot_df$main.celltype.annot %in% "T cells",]
for (subtype in unique(subtype_annotations_tcr$sub.celltype.annot)) {
subtype_cells <- row.names(subtype_annotations_tcr[subtype_annotations_tcr$sub.celltype.annot %in% subtype,])
tcell_annotation_col[names(tcell_annotation_col) %in% subtype_cells] <- subtype
}
tcr_df$SUBTYPE_ANNOTATION <- tcell_annotation_col
# BCR
bcell_annotation_col<- rep("No Annot",nrow(bcr_df))
names(bcell_annotation_col) <- bcr_df$unique_cell_id
subtype_annotations_bcr <- singler_annot_df[singler_annot_df$main.celltype.annot %in% "B cells",]
for (subtype in unique(subtype_annotations_bcr$sub.celltype.annot)) {
subtype_cells <- row.names(subtype_annotations_bcr[subtype_annotations_bcr$sub.celltype.annot %in% subtype,])
bcell_annotation_col[names(bcell_annotation_col) %in% subtype_cells] <- subtype
}
bcr_df$SUBTYPE_ANNOTATION <- bcell_annotation_col
clone_counts_tcr <- createCloneCountTable(tcr_df)
clone_counts_bcr <- createCloneCountTable(bcr_df)
# Can identify expanded clonotypes in the CSF with a high CSF:PB ratio
clone_count_subset <- clone_counts_tcr[clone_counts_tcr$cell_count > 1 & clone_counts_tcr$CSF_count > 1, ]
Highly expanded - cell count > 5 Moderate - 1 < cells count < 5 Non-expanded - cell count = 1
tcr_df <- addExpansionCategories(tcr_df,clone_counts_tcr)
bcr_df <- addExpansionCategories(bcr_df,clone_counts_bcr)
expansion_categ_col <- paste(colnames(rnaseq_obj),rnaseq_obj$main.celltype.annot)
uni_expansion_categ <- unique(tcr_df$EXPANSION_CATEGORY)
for (categ in unique(tcr_df$EXPANSION_CATEGORY)) {
cells_categ <- c()
cells_tcr <- tcr_df[tcr_df$EXPANSION_CATEGORY %in% categ,"unique_cell_id"]
cells_tcr <- unique(paste(cells_tcr,"T cells"))
cells_bcr <- bcr_df[bcr_df$EXPANSION_CATEGORY %in% categ,"unique_cell_id"]
cells_bcr <- unique(paste(cells_bcr,"B cells"))
if(length(cells_tcr)> 0) {cells_categ <- c(cells_categ, cells_tcr)}
if(length(cells_bcr)> 0) {cells_categ <- c(cells_categ, cells_bcr)}
expansion_categ_col[expansion_categ_col %in% cells_categ] <- categ
}
names(expansion_categ_col) <- colnames(rnaseq_obj)
rnaseq_obj$expansion_category <- expansion_categ_col
# add column for Diagnosis + Subtype & Diagnois + Compartment
rnaseq_obj$status.subtype <- paste(rnaseq_obj$status,rnaseq_obj$sub.celltype.annot)
rnaseq_obj$status.compartment <- paste(rnaseq_obj$status,rnaseq_obj$compartment)
cells <- names(rnaseq_obj$main.celltype.annot[rnaseq_obj$main.celltype.annot %in% "B cells"])
bcell_obj <- rnaseq_obj[,cells]
cells <- names(rnaseq_obj$main.celltype.annot[rnaseq_obj$main.celltype.annot %in% "T cells"])
tcell_obj <- rnaseq_obj[,cells]
save(rnaseq_obj,tcell_obj,bcell_obj,bcr_df,tcr_df,clone_counts_tcr,clone_counts_bcr,file = "objects/processed_objects.RData")